Chapter 7 Community composition
7.1 Metagenomics
7.1.1 Taxonomy overview
Number of Archaea phyla
genome_metadata %>%
filter(domain == "Archaea")%>%
dplyr::select(phylum) %>%
unique() %>%
pull() %>%
length()%>%
cat()0
Number of Bacteria phyla
genome_metadata %>%
filter(domain == "Bacteria")%>%
dplyr::select(phylum) %>%
unique() %>%
pull() %>%
length()%>%
cat()15
7.1.1.1 Phylum level
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(~factor(Species, labels=c("Eb" = "Eptesicus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")), scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "white"),
strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum,Species, Region) %>%
summarise(relabun=sum(count))phylum_summary %>%
group_by(phylum) %>%
summarise(Total_mean=mean(relabun*100, na.rm=T),
Total_sd=sd(relabun*100, na.rm=T),
Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
mutate(Total_meta=str_c(round(Total_mean,3),"±",round(Total_sd,3)),
Eptesicus_meta=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
Hypsugo_meta=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
Pipistrellus_meta=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>%
arrange(-Eb_mean)%>%
dplyr::select(phylum,Total_meta,Eptesicus_meta,Hypsugo_meta,Pipistrellus_meta)# A tibble: 15 × 5
phylum Total_meta Eptesicus_meta Hypsugo_meta Pipistrellus_meta
<chr> <chr> <chr> <chr> <chr>
1 Pseudomonadota 68.255±37.904 63.683±35.64 89.049±29.599 52.374±38.209
2 Bacillota 16.181±29.125 15.147±28.225 5.375±19.493 25.984±33.849
3 Desulfobacterota 3.981±10.582 7.227±13.106 0±0 5.944±13.024
4 Bacteroidota 6.774±17.384 5.695±9.903 5.263±22.942 8.569±14.844
5 Bacillota_A 1.545±4.266 2.215±4.627 0±0 2.576±5.538
6 Fusobacteriota 0.694±1.785 1.818±3.061 0±0 0.781±1.589
7 Campylobacterota 1.288±6.836 1.731±2.428 0±0 2.198±10.309
8 Elusimicrobiota 0.141±0.755 0.72±1.643 0±0 0±0
9 Synergistota 0.405±1.282 0.562±1.116 0±0 0.684±1.771
10 Planctomycetota 0.086±0.454 0.44±0.985 0±0 0±0
11 Deferribacterota 0.08±0.4 0.407±0.861 0±0 0±0
12 Bacillota_C 0.136±0.36 0.354±0.531 0±0 0.154±0.385
13 Actinomycetota 0.075±0.534 0±0 0.201±0.876 0±0
14 Cyanobacteriota 0.318±1.537 0±0 0±0 0.737±2.302
15 Spirochaetota 0.041±0.296 0±0 0.111±0.486 0±0
7.1.1.2 Family level
Percentange of families in each group
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family, Species) %>%
summarise(relabun=sum(count))family_summary %>%
group_by(family) %>%
summarise(Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
mutate(Eptesicus=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
Hypsugo=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
Pipistrellus=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>%
arrange(-Eb_mean, -Ha_mean) %>%
dplyr::select(family,Eptesicus,Hypsugo,Pipistrellus)# A tibble: 55 × 4
family Eptesicus Hypsugo Pipistrellus
<chr> <chr> <chr> <chr>
1 Diplorickettsiaceae 33.448±40.33 24.089±41.769 10.097±24.365
2 Enterobacteriaceae 17.987±30.59 10.107±20.975 18.469±28.031
3 Mycoplasmataceae 8.829±18.638 4.422±19.277 0±0
4 Vibrionaceae 7.534±23.723 5.863±11.437 5.576±18.175
5 Desulfovibrionaceae 6.023±11.116 0±0 4.561±9.963
6 Enterococcaceae 4.692±10.65 0.602±2.624 3.243±11.687
7 Dysgonomonadaceae 2.721±6.637 0±0 3.84±7.171
8 Aeromonadaceae 1.626±4.965 5.844±13.778 0±0
9 Leptotrichiaceae 1.611±3.041 0±0 0.733±1.597
10 Helicobacteraceae 1.544±2.452 0±0 2.198±10.309
# ℹ 45 more rows
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
# Per environment
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~Species)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")7.1.1.3 Genus level
Percetange of genera in each group
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum,genus, Species) %>%
summarise(relabun=sum(count))
genus_summary %>%
group_by(genus) %>%
summarise(Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
mutate(Eptesicus=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
Hypsugo=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
Pipistrellus=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>%
arrange(-Eb_mean, -Ha_mean) %>%
dplyr::select(genus,Eptesicus,Hypsugo,Pipistrellus)# A tibble: 72 × 4
genus Eptesicus Hypsugo Pipistrellus
<chr> <chr> <chr> <chr>
1 Aquirickettsiella 33.448±40.33 24.089±41.769 10.097±24.365
2 Spiroplasma 8.747±18.476 0±0 0±0
3 Vibrio 7.534±23.723 5.863±11.437 5.576±18.175
4 Jejubacter 4.268±9 0±0 0±0
5 Escherichia 3.416±10.802 0±0 0.317±1.488
6 Pseudocitrobacter 3.293±9.507 0±0 0.006±0.028
7 Frigididesulfovibrio 2.803±4.353 0±0 1.129±2.876
8 Enterococcus 2.769±5.838 0.602±2.624 3.243±11.687
9 Dysgonomonas 2.721±6.637 0±0 3.84±7.171
10 Serratia 2.389±7.462 6.416±19.813 2.331±10.5
# ℹ 62 more rows
genus_summary_sort <- genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary %>%
mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
geom_jitter(alpha=0.5) +
facet_grid(.~Species)+
theme_minimal() +
theme(axis.text.y = element_text(size=6))+
labs(y="Genera", x="Relative abundance", color="Phylum")Number of mags and distinct taxonomy
bats=c("Eb", "Pk", "Ha")
total_mags <- data.frame(
Bat = character(),
MAGs = numeric(),
Phylum = numeric(),
Family = numeric(),
Genus = numeric()
)
preabs_table <- genome_counts_filt %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("genome") %>%
left_join(genome_metadata, by=join_by("genome"=="genome"))
phylum <- preabs_table %>%
distinct(phylum)
family <- preabs_table %>%
distinct(phylum, class, order, family)
genus <- preabs_table %>%
distinct(phylum, class, order, family, genus)
total_mags <- rbind(
total_mags,
data.frame(
Bat = "Total",
MAGs = nrow(preabs_table),
Phylum = nrow(phylum),
Family = nrow(family),
Genus = nrow(genus)
)
)
for (bat in bats) {
number <- preabs_table %>%
select({{bat}}) %>%
filter(.>=1)
phylum <- preabs_table %>%
select({{bat}}, phylum) %>%
filter(!!sym(bat)>=1) %>%
distinct(phylum)
family <- preabs_table %>%
select({{bat}}, phylum, class, order, family) %>%
filter(!!sym(bat)>=1) %>%
distinct(phylum, class, order, family)
genus <- preabs_table %>%
select({{bat}}, phylum, class, order, family, genus) %>%
filter(!!sym(bat)>=1) %>%
distinct(phylum, class, order, family, genus)
total_mags <- rbind(
total_mags,
data.frame(
Bat = bat,
MAGs = nrow(number),
Phylum = nrow(phylum),
Family = nrow(family),
Genus = nrow(genus)
)
)
}bats=c("Eb", "Pk", "Ha")
no_annotation <- data.frame(
Bat = character(),
No_genus = numeric(),
No_species = numeric()
)
preabs_table <- genome_counts_filt %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("genome") %>%
left_join(genome_metadata, by=join_by("genome"=="genome"))
genus <- preabs_table %>%
filter(genus == "")
species <- preabs_table %>%
filter(species == "")
no_annotation <- rbind(
no_annotation,
data.frame(
Bat = "Total",
No_genus = nrow(genus),
No_species = nrow(species)
)
)
for (bat in bats) {
number <- preabs_table %>%
select({{bat}}) %>%
filter(.>=1)
genus <- preabs_table %>%
select({{bat}}, phylum, class, order, family, genus) %>%
filter(!!sym(bat)>=1) %>%
filter(genus == "")
species <- preabs_table %>%
filter(!!sym(bat)>=1) %>%
filter(species == "")
no_annotation <- rbind(
no_annotation,
data.frame(
Bat = bat,
No_genus = nrow(genus),
No_species = nrow(species)
)
)
}Total percentage of MAGs without genus-level annotation
nongenera <- genome_metadata %>%
filter(genus == "") %>%
summarize(Mag_nogenera = n()) %>%
pull()
nmags <- total_mags %>%
filter(Bat=="Total") %>%
select(MAGs) %>%
pull()
perct <- nongenera*100/nmags
cat(perct)20.74074
Percentage of MAGs without genus-level annotation by phylum
total_mag_phylum <- genome_metadata %>%
group_by(phylum) %>%
summarize(Total_MAGs = n())
genome_metadata %>%
filter(genus == "") %>%
group_by(phylum) %>%
summarize(MAGs_nogenus = n()) %>%
left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>%
mutate(Percentage_nogenus=100*MAGs_nogenus/Total_MAGs) %>%
tt()| phylum | MAGs_nogenus | Total_MAGs | Percentage_nogenus |
|---|---|---|---|
| Bacillota | 2 | 14 | 14.285714 |
| Bacillota_A | 8 | 19 | 42.105263 |
| Bacillota_C | 1 | 1 | 100.000000 |
| Bacteroidota | 1 | 19 | 5.263158 |
| Campylobacterota | 1 | 3 | 33.333333 |
| Deferribacterota | 2 | 2 | 100.000000 |
| Pseudomonadota | 12 | 51 | 23.529412 |
| Synergistota | 1 | 1 | 100.000000 |
Number of bacterial species
genome_metadata %>%
filter(domain == "Bacteria")%>%
dplyr::select(species) %>%
unique() %>%
pull() %>%
length() %>%
cat()37
Total percentage of MAGs without species-level annotation
nonspecies <- genome_metadata %>%
filter(species == "") %>%
summarize(Mag_nospecies = n()) %>%
pull()
perct <- nonspecies*100/nmags
cat(perct)72.59259
MAGs without species-level annotation
total_mag_phylum <- genome_metadata %>%
group_by(phylum) %>%
summarize(MAGs_total = n())
genome_metadata %>%
filter(species == "") %>%
group_by(phylum) %>%
summarize(MAGs_nospecies = n()) %>%
left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>%
mutate(species_annotated=MAGs_total-MAGs_nospecies) %>%
mutate(Percentage_nospecies=100*MAGs_nospecies/MAGs_total) %>%
mutate(Percentage_species=100-100*MAGs_nospecies/MAGs_total)%>%
tt()| phylum | MAGs_nospecies | MAGs_total | species_annotated | Percentage_nospecies | Percentage_species |
|---|---|---|---|---|---|
| Actinomycetota | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Bacillota | 10 | 14 | 4 | 71.42857 | 28.571429 |
| Bacillota_A | 18 | 19 | 1 | 94.73684 | 5.263158 |
| Bacillota_C | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Bacteroidota | 13 | 19 | 6 | 68.42105 | 31.578947 |
| Campylobacterota | 3 | 3 | 0 | 100.00000 | 0.000000 |
| Cyanobacteriota | 2 | 2 | 0 | 100.00000 | 0.000000 |
| Deferribacterota | 2 | 2 | 0 | 100.00000 | 0.000000 |
| Desulfobacterota | 14 | 14 | 0 | 100.00000 | 0.000000 |
| Elusimicrobiota | 4 | 4 | 0 | 100.00000 | 0.000000 |
| Fusobacteriota | 1 | 2 | 1 | 50.00000 | 50.000000 |
| Planctomycetota | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Pseudomonadota | 27 | 51 | 24 | 52.94118 | 47.058824 |
| Synergistota | 1 | 1 | 0 | 100.00000 | 0.000000 |
bats=c("Eb", "Pk", "Ha")
# Initialize an empty results data frame
single_sp <- data.frame(
Bat = character(),
Single_species = numeric()
)
table_upset_analysis <- genome_counts_filt %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample")
unique_all <- table_upset_analysis %>%
filter(rowSums(across(Eb:Pk)) == 1)
single_sp <- rbind(
single_sp,
data.frame(
Bat = "Total", # Label for the total value
Single_species = nrow(unique_all) # Aggregate sum of column sums
)
)
for (bat in bats) {
unique <- table_upset_analysis %>%
filter(rowSums(across(Eb:Pk)) == 1) %>%
select(all_of(bat)) %>%
filter(.>0) %>%
nrow()
# Add results to the results data frame
single_sp <- rbind(
single_sp,
data.frame(
Bat = bat,
Single_species = unique # Aggregate sum of column sums
)
)
}single_ind <- data.frame(
Bat = character(),
Single_individual = numeric()
)
freq_table <- genome_counts_filt %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("asv")
singleton_filt <- freq_table %>%
rowwise() %>%
mutate(row_sum = sum(c_across(-asv))) %>% # Calculate row sum for specific columns
filter(row_sum == 1) %>%
column_to_rownames(var = "asv") %>%
filter(row_sum==1)
single_ind <- rbind(
single_ind,
data.frame(
Bat = "Total",
Single_individual = nrow(singleton_filt) # Aggregate sum of column sums
)
)
for (bat in bats) {
singleton_filt <- freq_table %>%
rowwise() %>%
mutate(row_sum = sum(c_across(-asv))) %>%
filter(row_sum == 1) %>%
column_to_rownames(var = "asv") %>%
select(bat) %>%
filter(.==1)
single_ind <- rbind(
single_ind,
data.frame(
Bat = bat,
Single_individual = nrow(singleton_filt) # Aggregate sum of column sums
)
)
}7.2 Summary table
summary_table <- total_mags %>%
left_join(., no_annotation, by="Bat") %>%
left_join(., single_ind, by="Bat") %>%
left_join(., single_sp, by="Bat")
summary_table Bat MAGs Phylum Family Genus No_genus No_species Single_individual Single_species
1 Total 135 15 58 91 28 98 34 82
2 Eb 92 12 43 63 20 73 14 49
3 Pk 69 10 38 54 12 44 11 19
4 Ha 30 5 17 24 5 13 9 14
summary_table %>%
select(-Phylum,-Family, -Genus) %>%
rowwise() %>%
mutate(No_genus_perc=No_genus*100/MAGs)%>%
mutate(No_species_perc=No_species*100/MAGs) %>%
mutate(Single_individual_perc=Single_individual*100/MAGs)%>%
mutate(Single_species_perc=Single_species*100/MAGs) %>%
mutate(Single_individual_per_Single_species=Single_individual*100/Single_species) %>%
select(1,6:11)# A tibble: 4 × 7
# Rowwise:
Bat Single_species No_genus_perc No_species_perc Single_individual_perc Single_species_perc Single_individual_per_Single_species
<chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Total 82 20.7 72.6 25.2 60.7 41.5
2 Eb 49 21.7 79.3 15.2 53.3 28.6
3 Pk 19 17.4 63.8 15.9 27.5 57.9
4 Ha 14 16.7 43.3 30 46.7 64.3
7.3 Amplicon
7.3.1 Taxonomy overview
genome_metadata <- genome_metadata %>%
filter(!is.na(phylum))
genome_counts_filt <- genome_counts_filt %>%
filter(genome %in% genome_metadata$genome)%>%
mutate_at(vars(-genome),~./sum(.))%>%
column_to_rownames(., "genome") %>%
.[,colSums(.)>0.00005] %>%
filter(rowSums(across(everything())) != 0) %>%
rownames_to_column(., "genome")
genome_metadata <- genome_metadata %>%
filter(genome %in% genome_counts_filt$genome)7.3.1.1 Phylum level
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(~factor(Species, labels=c("Eb" = "Eptesicus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")), scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "white"),
strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum,Species) %>%
summarise(relabun=sum(count))phylum_summary %>%
group_by(phylum) %>%
summarise(Total_mean=mean(relabun*100, na.rm=T),
Total_sd=sd(relabun*100, na.rm=T),
Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(Total_mean,3),"±",round(Total_sd,3)),
Eptesicus=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
Hypsugo=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
Pipistrellus=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>%
arrange(-Eb_mean)%>%
dplyr::select(phylum,Total,Eptesicus,Hypsugo,Pipistrellus)# A tibble: 30 × 5
phylum Total Eptesicus Hypsugo Pipistrellus
<chr> <chr> <chr> <chr> <chr>
1 Pseudomonadota 58.806±25.926 53.499±30.532 67.515±18.835 53.697±28.06
2 Bacillota 26.442±20.944 25.488±23.748 22.918±16.066 29.919±23.604
3 Bacteroidota 4.876±7.757 5.588±8.456 5.332±8.486 4.158±7.066
4 Fusobacteriota 1.831±4.276 5.054±6.787 0.382±0.927 1.618±4.021
5 Desulfobacterota 2.107±4.594 4.449±5.799 0.327±0.938 2.58±5.419
6 Patescibacteria 0.48±2.558 2.198±5.658 0.052±0.151 0.069±0.295
7 Rs-K70 termite group 0.946±2.852 1.378±3.222 0.372±1.304 1.246±3.603
8 Synergistota 0.364±0.935 0.671±1.257 0.167±0.704 0.394±0.948
9 Planctomycetota 0.177±0.475 0.602±0.815 0.016±0.059 0.123±0.371
10 Campylobacterota 1.088±6.268 0.528±0.911 0.265±0.675 2.053±9.542
# ℹ 20 more rows
7.3.1.2 Family level
Percentange of families in each group
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family, Species) %>%
summarise(relabun=sum(count))family_summary %>%
group_by(family) %>%
summarise(Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
mutate(Eptesicus=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
Hypsugo=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
Pipistrellus=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>%
arrange(-Pk_mean) %>%
dplyr::select(family,Eptesicus,Hypsugo,Pipistrellus) %>%
left_join(., genome_metadata[c(3,6)] %>% unique(), by=join_by(family==family))# A tibble: 285 × 5
family Eptesicus Hypsugo Pipistrellus phylum
<chr> <chr> <chr> <chr> <chr>
1 Enterobacteriaceae 26.677±31.648 8.829±13.578 13.234±22.962 Pseudomonadota
2 Morganellaceae 0.286±0.614 4.136±6.821 9.526±19.046 Pseudomonadota
3 Yersiniaceae 0.223±0.478 9.419±14.295 6.631±18.115 Pseudomonadota
4 Enterococcaceae 6.975±10.452 2.884±4.689 5.421±10.609 Bacillota
5 Streptococcaceae 0.518±0.904 2.154±3.462 4.843±15.641 Bacillota
6 Diplorickettsiaceae 14.235±27.678 0.202±0.498 4.667±20.883 Pseudomonadota
7 Bacillaceae 0.463±0.701 7.36±9.57 4.639±14.907 Bacillota
8 Lachnospiraceae 5.118±11.827 2.331±5.594 4.137±8.837 Bacillota
9 Rickettsiaceae 0.048±0.147 6.597±15.049 4.097±13.242 Pseudomonadota
10 Pasteurellaceae 0.443±1.394 4.538±9.865 3.519±6.118 Pseudomonadota
# ℹ 275 more rows
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
# Per environment
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~Species)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")7.3.1.3 Genus level
Percetange of genera in each group
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum,genus, Species) %>%
summarise(relabun=sum(count))
genus_summary %>%
group_by(genus) %>%
summarise(Eb_mean=mean(relabun[Species=="Eb"]*100, na.rm=T),
Eb_sd=sd(relabun[Species=="Eb"]*100, na.rm=T),
Ha_mean=mean(relabun[Species=="Ha"]*100, na.rm=T),
Ha_sd=sd(relabun[Species=="Ha"]*100, na.rm=T),
Pk_mean=mean(relabun[Species=="Pk"]*100, na.rm=T),
Pk_sd=sd(relabun[Species=="Pk"]*100, na.rm=T)) %>%
mutate(Eptesicus=str_c(round(Eb_mean,3),"±",round(Eb_sd,3)),
Hypsugo=str_c(round(Ha_mean,3),"±",round(Ha_sd,3)),
Pipistrellus=str_c(round(Pk_mean,3),"±",round(Pk_sd,3))) %>%
arrange(-Pk_mean) %>%
dplyr::select(genus,Eptesicus,Hypsugo,Pipistrellus) %>%
left_join(., genome_metadata[c(3,7)] %>% unique(), by=join_by(genus==genus))# A tibble: 565 × 5
genus Eptesicus Hypsugo Pipistrellus phylum
<chr> <chr> <chr> <chr> <chr>
1 Serratia 0.223±0.478 9.05±14.401 6.493±18.155 Pseudomonadota
2 Morganella 0.015±0.033 2.013±6.097 5.672±14.831 Pseudomonadota
3 Enterococcus 6.975±10.452 2.884±4.688 5.418±10.61 Bacillota
4 Lactococcus 0.514±0.897 2.037±3.503 4.83±15.636 Bacillota
5 Bacillus 0.437±0.69 7.274±9.497 4.625±14.907 Bacillota
6 Rickettsiella 14.234±27.674 0.11±0.439 4.463±20.908 Pseudomonadota
7 Rickettsia 0.048±0.147 6.597±15.049 4.097±13.242 Pseudomonadota
8 Vibrio 3.254±10.076 5.492±8.443 3.046±11.078 Pseudomonadota
9 Enterobacter 15.407±26.424 4.97±10.782 2.813±6.493 Pseudomonadota
10 Lachnoclostridium 3.64±9.837 0.525±2.014 2.806±7.312 Bacillota
# ℹ 555 more rows
genus_summary_sort <- genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary %>%
mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
geom_jitter(alpha=0.5) +
facet_grid(.~Species)+
theme_minimal() +
theme(axis.text.y = element_text(size=6))+
labs(y="Genera", x="Relative abundance", color="Phylum")Number of mags and distinct taxonomy
bats=c("Eb", "Pk", "Ha")
total_mags <- data.frame(
Bat = character(),
MAGs = numeric(),
Phylum = numeric(),
Family = numeric(),
Genus = numeric()
)
preabs_table <- genome_counts_filt %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("genome") %>%
left_join(genome_metadata, by=join_by("genome"=="genome"))
phylum <- preabs_table %>%
distinct(phylum)
family <- preabs_table %>%
distinct(phylum, class, order, family)
genus <- preabs_table %>%
distinct(phylum, class, order, family, genus)
total_mags <- rbind(
total_mags,
data.frame(
Bat = "Total",
MAGs = nrow(preabs_table),
Phylum = nrow(phylum),
Family = nrow(family),
Genus = nrow(genus)
)
)
for (bat in bats) {
number <- preabs_table %>%
select({{bat}}) %>%
filter(.>=1)
phylum <- preabs_table %>%
select({{bat}}, phylum) %>%
filter(!!sym(bat)>=1) %>%
distinct(phylum)
family <- preabs_table %>%
select({{bat}}, phylum, class, order, family) %>%
filter(!!sym(bat)>=1) %>%
distinct(phylum, class, order, family)
genus <- preabs_table %>%
select({{bat}}, phylum, class, order, family, genus) %>%
filter(!!sym(bat)>=1) %>%
distinct(phylum, class, order, family, genus)
total_mags <- rbind(
total_mags,
data.frame(
Bat = bat,
MAGs = nrow(number),
Phylum = nrow(phylum),
Family = nrow(family),
Genus = nrow(genus)
)
)
}bats=c("Eb", "Pk", "Ha")
no_annotation <- data.frame(
Bat = character(),
No_genus = numeric(),
No_species = numeric()
)
preabs_table <- genome_counts_filt %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("genome") %>%
left_join(genome_metadata, by=join_by("genome"=="genome"))
genus <- preabs_table %>%
filter(is.na(genus))
species <- preabs_table %>%
filter(is.na(species))
no_annotation <- rbind(
no_annotation,
data.frame(
Bat = "Total",
No_genus = nrow(genus),
No_species = nrow(species)
)
)
for (bat in bats) {
number <- preabs_table %>%
select({{bat}}) %>%
filter(.>=1)
genus <- preabs_table %>%
select({{bat}}, phylum, class, order, family, genus) %>%
filter(!!sym(bat)>=1) %>%
filter(is.na(genus))
species <- preabs_table %>%
filter(!!sym(bat)>=1) %>%
filter(is.na(species))
no_annotation <- rbind(
no_annotation,
data.frame(
Bat = bat,
No_genus = nrow(genus),
No_species = nrow(species)
)
)
}Total percentage of MAGs without genus-level annotation
nongenera <- genome_metadata %>%
filter(is.na(genus)) %>%
summarize(Mag_nogenera = n()) %>%
pull()
nmags <- total_mags %>%
filter(Bat=="Total") %>%
select(MAGs) %>%
pull()
perct <- nongenera*100/nmags
cat(perct)32.13354
Percentage of MAGs without genus-level annotation by phylum
total_mag_phylum <- genome_metadata %>%
group_by(phylum) %>%
summarize(Total_MAGs = n())
genome_metadata %>%
filter(is.na(genus)) %>%
group_by(phylum) %>%
summarize(MAGs_nogenus = n()) %>%
left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>%
mutate(Percentage_nogenus=100*MAGs_nogenus/Total_MAGs) %>%
tt()| phylum | MAGs_nogenus | Total_MAGs | Percentage_nogenus |
|---|---|---|---|
| Actinomycetota | 72 | 251 | 28.685259 |
| Apal-E12 | 1 | 1 | 100.000000 |
| Armatimonadota | 1 | 1 | 100.000000 |
| Bacillota | 504 | 1334 | 37.781109 |
| Bacteroidota | 92 | 490 | 18.775510 |
| Bdellovibrionota | 4 | 9 | 44.444444 |
| Chloroflexi | 8 | 9 | 88.888889 |
| Crenarchaeota | 1 | 7 | 14.285714 |
| Cyanobacteriota | 18 | 53 | 33.962264 |
| Dependentiae | 1 | 1 | 100.000000 |
| Desulfobacterota | 41 | 185 | 22.162162 |
| Halobacterota | 1 | 35 | 2.857143 |
| Myxococcota | 2 | 5 | 40.000000 |
| Patescibacteria | 38 | 60 | 63.333333 |
| Planctomycetota | 60 | 70 | 85.714286 |
| Pseudomonadota | 349 | 1176 | 29.676871 |
| Rs-K70 termite group | 16 | 16 | 100.000000 |
| Spirochaetota | 2 | 7 | 28.571429 |
| Synergistota | 13 | 28 | 46.428571 |
| Thermoplasmatota | 2 | 3 | 66.666667 |
| Verrucomicrobiota | 6 | 35 | 17.142857 |
Number of bacterial species
genome_metadata %>%
filter(domain == "Bacteria")%>%
dplyr::select(species) %>%
unique() %>%
pull() %>%
length() %>%
cat()162
Total percentage of MAGs without species-level annotation
nonspecies <- genome_metadata %>%
filter(species == "") %>%
summarize(Mag_nospecies = n()) %>%
pull()
perct <- nonspecies*100/nmags
cat(perct)0
MAGs without species-level annotation
total_mag_phylum <- genome_metadata %>%
group_by(phylum) %>%
summarize(MAGs_total = n())
genome_metadata %>%
filter(is.na(species)) %>%
group_by(phylum) %>%
summarize(MAGs_nospecies = n()) %>%
left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>%
mutate(species_annotated=MAGs_total-MAGs_nospecies) %>%
mutate(Percentage_nospecies=100*MAGs_nospecies/MAGs_total) %>%
mutate(Percentage_species=100-100*MAGs_nospecies/MAGs_total)%>%
tt()| phylum | MAGs_nospecies | MAGs_total | species_annotated | Percentage_nospecies | Percentage_species |
|---|---|---|---|---|---|
| Actinomycetota | 228 | 251 | 23 | 90.83665 | 9.163347 |
| Apal-E12 | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Armatimonadota | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Bacillota | 1288 | 1334 | 46 | 96.55172 | 3.448276 |
| Bacteroidota | 468 | 490 | 22 | 95.51020 | 4.489796 |
| Bdellovibrionota | 9 | 9 | 0 | 100.00000 | 0.000000 |
| Campylobacterota | 10 | 13 | 3 | 76.92308 | 23.076923 |
| Chloroflexi | 9 | 9 | 0 | 100.00000 | 0.000000 |
| Crenarchaeota | 7 | 7 | 0 | 100.00000 | 0.000000 |
| Cyanobacteriota | 51 | 53 | 2 | 96.22642 | 3.773585 |
| Deferribacterota | 9 | 9 | 0 | 100.00000 | 0.000000 |
| Deinococcota | 5 | 5 | 0 | 100.00000 | 0.000000 |
| Dependentiae | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Desulfobacterota | 185 | 185 | 0 | 100.00000 | 0.000000 |
| Elusimicrobiota | 5 | 5 | 0 | 100.00000 | 0.000000 |
| Euryarchaeota | 3 | 3 | 0 | 100.00000 | 0.000000 |
| Fusobacteriota | 15 | 19 | 4 | 78.94737 | 21.052632 |
| Halobacterota | 35 | 35 | 0 | 100.00000 | 0.000000 |
| Myxococcota | 5 | 5 | 0 | 100.00000 | 0.000000 |
| Nanohaloarchaeota | 2 | 2 | 0 | 100.00000 | 0.000000 |
| Patescibacteria | 60 | 60 | 0 | 100.00000 | 0.000000 |
| Planctomycetota | 70 | 70 | 0 | 100.00000 | 0.000000 |
| Pseudomonadota | 1082 | 1176 | 94 | 92.00680 | 7.993197 |
| Rs-K70 termite group | 16 | 16 | 0 | 100.00000 | 0.000000 |
| Spirochaetota | 7 | 7 | 0 | 100.00000 | 0.000000 |
| Sumerlaeota | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Synergistota | 28 | 28 | 0 | 100.00000 | 0.000000 |
| Thermoplasmatota | 3 | 3 | 0 | 100.00000 | 0.000000 |
| Verrucomicrobiota | 35 | 35 | 0 | 100.00000 | 0.000000 |
ASVs in single species
bats=c("Eb", "Pk", "Ha")
# Initialize an empty results data frame
single_sp <- data.frame(
Bat = character(),
Single_species = numeric()
)
table_upset_analysis <- genome_counts_filt %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample")
unique_all <- table_upset_analysis %>%
filter(rowSums(across(Eb:Pk)) == 1)
single_sp <- rbind(
single_sp,
data.frame(
Bat = "Total", # Label for the total value
Single_species = nrow(unique_all) # Aggregate sum of column sums
)
)
for (bat in bats) {
unique <- table_upset_analysis %>%
filter(rowSums(across(Eb:Pk)) == 1) %>%
select(bat) %>%
filter(.>0) %>%
nrow()
# Add results to the results data frame
single_sp <- rbind(
single_sp,
data.frame(
Bat = bat,
Single_species = unique # Aggregate sum of column sums
)
)
}ASVs in a single individual
single_ind <- data.frame(
Bat = character(),
Single_individual = numeric()
)
singleton_filt <- freq_table %>%
rowwise() %>%
mutate(row_sum = sum(c_across(-asv))) %>% # Calculate row sum for specific columns
filter(row_sum == 1) %>%
column_to_rownames(var = "asv") %>%
filter(row_sum==1)
single_ind <- rbind(
single_ind,
data.frame(
Bat = "Total",
Single_individual = nrow(singleton_filt) # Aggregate sum of column sums
)
)
for (bat in bats) {
singleton_filt <- freq_table %>%
rowwise() %>%
mutate(row_sum = sum(c_across(-asv))) %>%
filter(row_sum == 1) %>%
column_to_rownames(var = "asv") %>%
select(bat) %>%
filter(.==1)
single_ind <- rbind(
single_ind,
data.frame(
Bat = bat,
Single_individual = nrow(singleton_filt) # Aggregate sum of column sums
)
)
}7.4 Summary table
summary_table <- total_mags %>%
left_join(., no_annotation, by="Bat") %>%
left_join(., single_ind, by="Bat") %>%
left_join(., single_sp, by="Bat")
summary_table Bat MAGs Phylum Family Genus No_genus No_species Single_individual Single_species
1 Total 3834 30 328 732 1232 3639 34 3387
2 Eb 1381 19 158 294 481 1326 14 1093
3 Pk 1523 25 217 408 511 1443 11 1140
4 Ha 1500 24 239 496 361 1387 9 1154
summary_table %>%
select(-Phylum,-Family, -Genus) %>%
rowwise() %>%
mutate(No_genus_perc=No_genus*100/MAGs)%>%
mutate(No_species_perc=No_species*100/MAGs) %>%
mutate(Single_individual_perc=Single_individual*100/MAGs)%>%
mutate(Single_species_perc=Single_species*100/MAGs) %>%
mutate(Single_individual_per_unique=Single_individual*100/Single_species) %>%
select(1,6:11)# A tibble: 4 × 7
# Rowwise:
Bat Single_species No_genus_perc No_species_perc Single_individual_perc Single_species_perc Single_individual_per_unique
<chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Total 3387 32.1 94.9 0.887 88.3 1.00
2 Eb 1093 34.8 96.0 1.01 79.1 1.28
3 Pk 1140 33.6 94.7 0.722 74.9 0.965
4 Ha 1154 24.1 92.5 0.6 76.9 0.780